Load libraries and DE genes
library(ggplot2)
library(readxl)
library(ggrepel)
library(kableExtra)
library(tidyverse)
library(org.Hs.eg.db)
library(clusterProfiler)
library(enrichplot)
library(msigdbr)
library(ggplot2)
hs=get("org.Hs.eg.db")
MEDIAsave="/home/mclaudia/MEDIA Project/Codici_finali/"
MEDIAgraph="/home/mclaudia/MEDIA Project/Codici_finali/Paper_figures/"
load(paste0(MEDIAsave,"DE_DESeq2_SCell.RData"))
singleCell <- df.res_sc
singleCell$gene_name <-rownames(singleCell)
load(paste0(MEDIAsave,"genide-bulk-unpaired_DE.RData"))
unpaired <-OAvsnonOA
load(paste0(MEDIAsave,"results_DE_EGApaired.RData"))
paired <-df.res
load(paste0(MEDIAsave,"DEgenes_from_validation.RData"))
valid <-dff
rm(df.res,OAvsnonOA,df.res_sc)
up_genes <- read_excel(paste0(MEDIAsave,"up_dw_genes_consensus.xlsx"), sheet=1)
dw_genes <- read_excel(paste0(MEDIAsave,"up_dw_genes_consensus.xlsx"), sheet=2)
Pre-process dataset for the Enrichment Bubble Plot construction
riskGenes <-c("TNFSF11","LOXL3","DNER","TSPAN2","THBS3","ASPN","HTRA1","DYSF")
ddsc <-singleCell$gene_name[grep("\\.",singleCell$gene_name)]
ss=ddsc[-grep("^AP0.+|^RP.+-|^CT.-.+|^AC.+|^AL.|^GS1-.+|^XX.+|^LL.+",ddsc)]
singleCell <-singleCell[!singleCell$gene_name %in% c("CRYBG3.1", "SRSF10.1"),]
ss <-ss[!ss%in%c("CRYBG3.1", "SRSF10.1")]
singleCell[singleCell$gene_name %in% ss,"gene_name"] <- gsub("\\..*","",ss)
rm(ss,ddsc)
singleCell <-singleCell[,c(2,5,6)]
colnames(singleCell) <- c("log2FC","p_adj","gene_name")
singleCell$class <-"Dataset 1"
paired <-paired[,c(3,7,8)]
colnames(paired) <- c("log2FC","p_adj","gene_name")
paired$class <-"Dataset 2"
unpaired <-unpaired[,c(3,7,8)]
colnames(unpaired) <- c("log2FC","p_adj","gene_name")
unpaired$class <-"Dataset 3"
valid <-valid[,c(2,3,1)]
colnames(valid) <- c("log2FC","p_adj","gene_name")
valid$class <-"Dataset 4/Validation"
total_genes <-rbind(singleCell, paired, unpaired, valid)
rownames(total_genes) <-paste0(1:nrow(total_genes))
total_genes[1:10,]
## log2FC p_adj gene_name class
## 1 14.356062 1.319705e-73 COL1A1 Dataset 1
## 2 1.605177 1.141659e-64 ABI3BP Dataset 1
## 3 2.863385 2.653574e-64 TGFBI Dataset 1
## 4 1.685722 8.423114e-60 TSPAN2 Dataset 1
## 5 2.232388 8.576118e-53 PDLIM7 Dataset 1
## 6 12.742026 3.912155e-49 CRLF1 Dataset 1
## 7 1.863951 7.741079e-48 MT1G Dataset 1
## 8 2.173891 1.781909e-47 CRTAC1 Dataset 1
## 9 -1.184766 1.076830e-42 KLF10 Dataset 1
## 10 11.641215 4.644320e-41 CLIC3 Dataset 1
Annotation
total_genes$col <-"gray50"
total_genes[total_genes$log2FC <= -1.5 & total_genes$class %in% c("Dataset 1","Dataset 3","Dataset 4/Validation"), "col"] <- "springgreen3"
total_genes[total_genes$log2FC <= -0.8 & total_genes$class=="Dataset 2", "col"] <- "springgreen3"
total_genes[total_genes$log2FC >= 1.5 & total_genes$class %in% c("Dataset 1","Dataset 3","Dataset 4/Validation"), "col"] <- "red"
total_genes[total_genes$log2FC >= 0.8 & total_genes$class=="Dataset 2", "col"] <- "red"
total_genes[total_genes$gene_name %in% c(up_genes$gene,dw_genes$gene), "col"] <- "blue"
total_genes[total_genes$p_adj > 0.05, "col"] <- "gray50"
total_genes$label <- NA
total_genes[total_genes$gene_name %in% riskGenes, "label"] <- total_genes[total_genes$gene_name %in% riskGenes,"gene_name"]
total_genes$Shape <- "unselected"
total_genes[total_genes$gene_name %in% riskGenes, "Shape"] <- "selected risk.sc."
total_genes$ss <-1
total_genes[total_genes$gene_name %in% riskGenes, "ss"] <- 3
Volcano Plots
ggplot(data=total_genes, aes(x=log2FC, y=-log10(p_adj), color=col,label=label, shape=Shape)) +
geom_point(size=total_genes$ss) +
geom_label_repel(size=5,
min.segment.length = 0,direction = "both",
max.overlaps =40,
color="black",
nudge_x=1,
segment.linetype=2) +
scale_shape_manual(values=c(8, 20))+
scale_color_manual(values = c("red","gray50","springgreen3","blue"),
breaks = c("red","gray50","springgreen3","blue"),
labels = c("UP","notDE","DOWN","consensus sig.")) +
labs(color="Legend")+
geom_hline(yintercept=-log10(0.05), col="gray", linetype=2)+
theme_bw()+
facet_wrap(.~class, scales = "free_y", ncol=2)+
theme(strip.text = element_text(size = 18, face = "bold"),
legend.title=element_text(size=16),
legend.text=element_text(size=14),
axis.title=element_text(size=15))+
xlab("log2(FC)")+
ylab("-log10(FDR)")+
guides(color = guide_legend(override.aes = list(size = 4)),
shape = guide_legend(override.aes = list(size = 4)))

GSEA: GO BP and REACTOME analyses, select processes commonly
enriched by all the datasets
genes1 <-as.numeric(total_genes[total_genes$class=="Dataset 1",1])
names(genes1) <-total_genes[total_genes$class=="Dataset 1",3]
genes2 <-as.numeric(total_genes[total_genes$class=="Dataset 2",1])
names(genes2) <-total_genes[total_genes$class=="Dataset 2",3]
genes3 <-as.numeric(total_genes[total_genes$class=="Dataset 3",1])
names(genes3) <-total_genes[total_genes$class=="Dataset 3",3]
list_genes <-list(genes1, genes2, genes3)
list_obj <-vector("list",3)
names(list_obj) <-c("gsebp1","gsebp2","gsebp3")
list_objR <-vector("list",3)
names(list_objR) <-c("ans1","ans2","ans3")
rc <-msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
sig <-rc[,c(3,4)]
sig$gs_name <-gsub("^REACTOME_","",sig$gs_name)
for(i in 1:length(list_genes)){
x <-sort(list_genes[[i]], decreasing = TRUE)
gsebp <- gseGO(geneList=x,
ont ="BP",
keyType = "SYMBOL",
pvalueCutoff = 0.15,
verbose = TRUE,
minGSSize = 5,
OrgDb = hs,
pAdjustMethod = "fdr")
list_obj[[i]] <-gsebp
gsreac <- clusterProfiler::GSEA(x,TERM2GENE = sig,
minGSSize =5, pAdjustMethod = "fdr",
pvalueCutoff =0.15)
list_objR[[i]] <-gsreac
}
rb1=list_obj[[1]]@result
rb1$class <-"Dataset 1"
rb2=list_obj[[2]]@result
rb2$class <-"Dataset 2"
rb3=list_obj[[3]]@result
rb3$class <-"Dataset 3"
rc1=list_objR[[1]]@result
rc1$class <-"Dataset 1"
rc2=list_objR[[2]]@result
rc2$class <-"Dataset 2"
rc3=list_objR[[3]]@result
rc3$class <-"Dataset 3"
table_allBP <-rbind(rb1,rb2,rb3)
table_allBP <-table_allBP[table_allBP$NES > 0,]
table_allRc <-rbind(rc1,rc2,rc3)
table_allRc <-table_allRc[table_allRc$NES > 0,]
sell <-names(table(table_allBP$ID)[table(table_allBP$ID)==3])
sellr <-names(table(table_allRc$ID)[table(table_allRc$ID)==3])
sel_pb <-table_allBP[table_allBP$ID %in% sell,]
sel_rc <-table_allRc[table_allRc$ID %in% sellr,]
sel_pb <-sel_pb[,c(2,5,7,12)]
sel_rc <-sel_rc[,c(1,5,7,12)]
Bubble plot of common enriched processes
ggplot(sel_pb, aes(y =class , x =Description)) +
ggtitle("GO: Biological Processes enriched in all the three experiments") +
geom_point(data=sel_pb,aes(y =class , x =Description, size = NES, colour = -log(p.adjust)), alpha=.8)+
scale_color_gradient(low="blue",high="red")+
theme_bw()+
theme(plot.title = element_text(face="bold",size=16),plot.margin = unit(c(2,1,1,2), "cm"),
axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=15, face="bold"),
axis.text.y = element_text(size=15, face="bold"),
legend.title=element_text(size=16),
legend.text=element_text(size=14))+
labs(x="", y="", size="NES", color="-log10(FDR)")

ggplot(sel_rc, aes(y =class , x =ID)) +
ggtitle("REACTOME Pathways enriched in all the three experiments") +
geom_point(data=sel_rc,aes(y =class , x =ID, size = NES, colour = -log(p.adjust)), alpha=.8)+
scale_color_gradient(low="blue",high="red")+
theme_bw()+
theme(plot.title = element_text(face="bold", size=16),plot.margin = unit(c(5,1,1,5), "cm"),
axis.text.x = element_text(angle = 60, vjust = 1, hjust=1, size=11, face="bold"),
axis.text.y = element_text(size=13, face="bold"),
legend.title=element_text(size=16),
legend.text=element_text(size=14))+
labs(x="", y="", size="NES", color="-log10(FDR)")
